In [7]:
    
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
from __future__ import division
from math import *
from scipy.optimize import fmin_bfgs
sns.set_style("whitegrid")
%matplotlib inline
    
In [8]:
    
df = pd.read_csv('ex2data2.txt', header=None)
# number of the training examples
m = df.shape[0]
# inserting column to our data
df.insert(0,'3', np.ones(m))
    
In [9]:
    
# changing columns name
df.columns = ['one', 'second', 'third', 'result']
# head of the data
df.head(10)
    
    Out[9]:
In [10]:
    
# Spliting X and Y values
X = df[['one', 'second', 'third']]
y = df['result']
    
In [16]:
    
sns.lmplot(x='second', y='third', data=df, hue='result', size=9, fit_reg=False, scatter_kws={"s": 100})
    
    Out[16]:
    
In [64]:
    
# Plotting boundry line
sns.lmplot(x='second', y='third', data=df, hue='result', size=9, fit_reg=False, scatter_kws={"s": 100})
plt.contour(u,v,z,1)
    
    Out[64]:
    
In [59]:
    
# Initialization
m_row = X.shape[0]
# Creating new features for getting more complicated plot
X_new = mapFeature(X)
m_column = X_new.shape[1]
_lambda = 0
theta = pd.Series(np.zeros(m_column))
    
In [60]:
    
gradient_function(theta, X_new, y, _lambda).T[0:5]
    
    Out[60]:
In [61]:
    
cost_function(theta,X_new, y, _lambda)
    
    Out[61]:
In [62]:
    
xopt = fmin_bfgs(f= cost_function,
                 x0= theta,
                 fprime= gradient_function,
                 args=(X_new,y, _lambda),
                 maxiter=400)
    
    
In [63]:
    
# Here is the grid range
u = np.linspace(-1,1.5,50)
v = np.linspace(-1,1.5,50)
z = np.zeros((u.size,v.size))
for i in range(u.size):
    for j in range(v.size):
        dd = pd.DataFrame([1, u[i], v[j]]).T
        dd.columns = ['one', 'second', 'third']
        z[i,j] = mapFeature(dd).dot(xopt)
z = z.T
    
In [19]:
    
# Map featuring
def mapFeature(X, degree= 7) :
    count = 0;
    X_new = pd.DataFrame(np.ones(X.shape[0]))
    for i in range(degree):
        for j in range(i + 1):
            X_new[count] = ( X['second'] ** (i - j) ) * ( X['third'] ** j )
            count += 1
            
    return X_new
#functions Sections
def sigmoid(x):
    return ( 1 / ( 1 + e ** ( -1 * x)))
def cost_function(theta,X,y, _lam):
    J = 0
    # finding hypothesis
    h = pd.Series(np.dot( theta.T, X.T ).T)
    
    # Computing Log(sigmoid(x)) for all of the hypotesis elements
    h1 = sigmoid(h).apply(log)
    
    # Computing Log( 1 - simgoid(x)) for all of the hypotesis elements
    h2 = (1.0000000001 - sigmoid(h)).apply(log)
    
    #Computing Cost of the hypotesis
    J =  ( -1 / m_row ) * ( y.T.dot(h1) + ( 1 - y ).T.dot(h2)) + ( _lam / ( 2 * m_row ) * sum( theta ** 2 ))
    
    return J
def gradient_function( theta,X, y,  _lam):
    # finding hypotesis matrix
    h = pd.Series(np.dot( theta.T, X.T ).T)
    h = sigmoid(h)
    # Computing the Gradient Of the Hypotesis
    grad = pd.Series(np.zeros(m_column))
    grad[0] = ( 1 / m_row ) * ( ( h - y ).T.dot(X[0]).T )
    grad[1:] = ( 1 / m_row ) * ( ( h - y ).T.dot( X.T[1:].T ).T ) + ( _lam / m_row ) * theta[1:]
    
    return grad
def gradient_algo(X, y, theta, _lam):
    for n in range(iterations):
        
        # finding gradient of each element
        grad = gradient_function(X, y, theta, _lam)
        # decreasing theta
        theta = theta - alpha * ( grad )
        
        #saving all of the costs
        global last_j
        last_j[n] = cost_function(X, y, theta, _lam)
        
    return theta
    
In [ ]: